import urllib.request
import re

# BOOK_UNIT_LINK_PATTERN = '''<div class="bgg"><a href='(.*?)'>(.*?)</a></div>'''
BOOK_UNIT_LINK_PATTERN = '''<div class="bgg"><a href='(/book/(.*?)/(.*?).html)'>(.*?)</a></div>'''


url = 'http://m.qu.la/booklist/4687.html'

req = urllib.request.Request(url)
html = urllib.request.urlopen(req)

doc = html.read().decode('gbk','ignore')

alist = list(set(re.findall(BOOK_UNIT_LINK_PATTERN, doc)))


if len(alist) <= 0:
    print(doc)
    print('------')
    print(html.read().decode('utf8','ignore'))
else:
    for item in alist:
        print(item)
    # print(doc)